library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(patchwork)
library(ggplot2)
library(ggmap)
## Warning: 程辑包'ggmap'是用R版本4.3.2 来建造的
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
## Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service/>
## OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles/>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
library(plotly)
##
## 载入程辑包:'plotly'
##
## The following object is masked from 'package:ggmap':
##
## wind
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(mgcv)
## 载入需要的程辑包:nlme
##
## 载入程辑包:'nlme'
##
## The following object is masked from 'package:dplyr':
##
## collapse
##
## This is mgcv 1.9-0. For overview type 'help("mgcv-package")'.
library(gganimate)
## Warning: 程辑包'gganimate'是用R版本4.3.2 来建造的
knitr::opts_chunk$set(
echo = TRUE,
fig.width = 6,
fig.asp = 0.6,
out.width = "90%"
)
theme_set(theme_minimal() + theme(legend.position = "bottom"))
options(
ggplot2.continuous.colour = "viridis",
ggplot2.continuous.fill = "viridis"
)
scale_colour_discrete = scale_color_viridis_d
scale_fill_discrete = scale_fill_viridis_d
Preliminary data import and cleaning:
rats_df =
read_csv("./data/rat_sightings.csv", ) %>%
janitor::clean_names() %>%
select(unique_key, created_date, location_type, incident_zip, city, borough, latitude, longitude, location) %>%
separate(created_date, into = c("created_date", "time", "am_pm"), sep = " ") %>%
separate(created_date, into = c("month", "day", "year"), sep = "/")
## Rows: 232417 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (26): Created Date, Closed Date, Agency, Agency Name, Complaint Type, De...
## dbl (5): Unique Key, X Coordinate (State Plane), Y Coordinate (State Plane)...
## lgl (7): Vehicle Type, Taxi Company Borough, Taxi Pick Up Location, Bridge ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(rats_df)
## # A tibble: 6 × 13
## unique_key month day year time am_pm location_type incident_zip city
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 56242499 12 13 2022 06:52:10 PM 3+ Family Apt.… 10472 BRONX
## 2 56242808 12 13 2022 02:28:55 PM Other (Explain… 10025 NEW …
## 3 57383018 04 21 2023 10:03:11 AM 3+ Family Apt.… 10026 NEW …
## 4 58545179 08 18 2023 03:37:48 AM 1-2 Family Dwe… 11222 BROO…
## 5 56610428 01 24 2023 11:45:24 PM Other (Explain… 11204 BROO…
## 6 58788175 09 11 2023 11:52:46 PM 3+ Family Apt.… 11238 BROO…
## # ℹ 4 more variables: borough <chr>, latitude <dbl>, longitude <dbl>,
## # location <chr>
register_stadiamaps(key = "7fe0e792-45a1-4318-8bea-cd0eb5c44b18")
nyc_map <- get_stadiamap(bbox = c(left = -74.26, bottom = 40.495, right = -73.7, top = 40.92),zoom=10)
## ℹ © Stadia Maps © Stamen Design © OpenMapTiles © OpenStreetMap contributors.
ggmap(nyc_map)
rats_covid=rats_df|>
filter(month %in% "12",
year%in% "2019")|>
filter(borough != "Unspecified")
ggmap(nyc_map)+
geom_point(aes(x=longitude,y=latitude,color=borough),alpha=0.3,data=rats_covid)+
theme_minimal() +
labs(title = "Population Density Map before Covid")
## Warning: Removed 12 rows containing missing values (`geom_point()`).
rats_march=rats_df|>
filter(month %in% "01",
year%in% "2023")|>
filter(borough != "Unspecified")
ggmap(nyc_map)+
geom_point(aes(x=longitude,y=latitude,color=borough),alpha=0.3,data=rats_march)+
theme_minimal() +
labs(title = "Population Density Map as of March 2023")
## Warning: Removed 4 rows containing missing values (`geom_point()`).
rats_november=rats_df|>
filter(month %in% "11",
year%in% "2023")|>
filter(borough != "Unspecified")
ggmap(nyc_map)+
geom_point(aes(x=longitude,y=latitude,color=borough),alpha=0.3,data=rats_november)+
theme_minimal() +
labs(title = "Population Density Map as of November 2023")
## Warning: Removed 12 rows containing missing values (`geom_point()`).
## Interpretation of the Graph >>The bar graph above illustrates
the rat density before Covid, March 2023 and November 2023 over the
boroughs in New York City. Between December, 2019 and March 2023, the
graph shows significant difference in rat density especially in
Manhattan. Between March and November 2023, the rat czar program was
implemented on April 2023. These two graphs shows the comparison in
density one month before the program started and the most recent density
(as of November 2023). There are no significant change in density
visible from the graph except it seems less dense in staten island.
rats_2023=rats_df|>
filter(year %in% "2023")|>
drop_na()|>
ggplot(aes(x=longitude,y=latitude))+
theme_minimal() +
labs(title = "Population Density Map as of 2023")+
geom_point(aes(color=borough,alpha=0.3))+
facet_wrap(~ month, scales = "fixed", ncol = 6)
ggplotly(rats_2023)
A graph showing the density change throughout the year of 2023.While some density change is visible through the graph, it is not significant enough to show major changes made by the rat czar program. The rat problem still seems severe through out the entire new york city. The density change maybe explained by other factors such as weather change and rats’ breeding season.
rats_allyear=rats_df|>
mutate(year=as.numeric(year))|>
na.omit()|>
filter(borough != "Unspecified")
rats_allyeargraph<-ggmap(nyc_map)+
geom_point(aes(x=longitude,y=latitude,color=borough),alpha=0.3,data=rats_allyear)
labs(title = "Population Density Map")+
geom_point(aes(color=borough,alpha=0.3),na.rm=TRUE)
## NULL
map_with_animation <- rats_allyeargraph+
transition_time(year) +
ggtitle('Year: {frame_time}',
subtitle = 'Frame {frame} of {nframes}')
num_years <- max(rats_allyear$year) - min(rats_allyear$year) + 1
animate(map_with_animation, nframes = num_years, fp=1)
Grapj showing rat density change in the city of New York Over the years
ANOVA test on location_type:
loc_df =
rats_df %>%
filter(year == "2018" | year == "2019" | year == "2020" | year == "2021" | year == "2022" | year == "2023") %>%
select(year, location_type) %>%
group_by(year, location_type) %>%
summarize(sightings = n()) %>%
arrange(desc(sightings)) %>%
filter(location_type == "3+ Family Apt. Building" | location_type == "1-2 Family Dwelling" | location_type == "Commercial Building" | location_type == "Construction Site")
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
loc_df %>%
ggplot(aes(x = year, y = sightings, fill = location_type)) +
geom_area(aes(group = location_type, alpha = 0.8)) +
theme_bw() +
labs(title = "Rat sightings by location type 2018-2023")